{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 1. 观察三变量+一关键词的url"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from requests_html import HTMLSession\n",
    "# 建构 参数模板 我找的三个变量是2018互联网300强，游戏产业，薪资\n",
    "参数_compTag_平面设计师 = {\n",
    "    '2018互联网300强': {'init': ['-1'], 'headckid': ['d1e221a321324b0a'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['平面设计师'], 'compTag': ['182'],'sortFlag':['15'], 'ckid': ['1c550dac0986a6c0'], 'siTag': ['t5LT5RbKHSDTHNXx-O2GJQ%7EHjSmCnkUpSjgS7HPdUS6mw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['ac73892a8848bbf467216d5237e3ccc0'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['89b4323adff94f4d8e48ef9aebfec3f2']}, \n",
    "    '游戏产业': {'init': ['-1'], 'headckid': ['b39b6791d9396769'], 'searchType': ['1'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['平面设计师'],'sortFlag':['15'], 'ckid': ['b39b6791d9396769'], 'industries':['420'], 'siTag': ['t5LT5RbKHSDTHNXx-O2GJQ%7EfA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['d2604571179a6dc591ca3cefa863d307'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['d2604571179a6dc591ca3cefa863d307']},\n",
    "    '薪资20-30万': {'init': ['-1'], 'headckid': ['94a3cd36337460de'], 'searchType': ['1'], 'flushckid': ['1'], 'fromSearchBtn': ['2'], 'keyword': ['平面设计师'],'sortFlag':['15'], 'ckid': ['94a3cd36337460de'], 'industries':['420'], 'siTag': ['t5LT5RbKHSDTHNXx-O2GJQ%7EfA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['d015c34bffa45e8d754200e8fa2ec08e'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['d015c34bffa45e8d754200e8fa2ec08e']}\n",
    "}\n",
    "# 这里要注意，你改变变量的同时，变量所代表的数字也会改变\n",
    "# 2018互联网300强的参数数据"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. 解析url参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "edu        5\n",
      "经验         5\n",
      "薪水        43\n",
      "时间        14\n",
      "职称        79\n",
      "公司地点      33\n",
      "公司名称      41\n",
      "链结        80\n",
      "公司URL     42\n",
      "热门公司类型     3\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>职称</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>公司名称</th>\n",
       "      <th>edu</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">恺英网络</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>本科及以上</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>上海敬游软件科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阿里巴巴</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>完美世界</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>乐元素</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>三七互娱</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>成都零点能量信息技术有限责任公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大搜车</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>喜马拉雅</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>斗鱼网络</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>南京世赢乾网络科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>恺英网络</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>祖龙娱乐</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>广州百游信息科技有限公司</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>福州龙腾简合网络技术有限公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>广州千骐动漫有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>紫龙游戏</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>完美世界</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>武汉虚咖科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阿里巴巴</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>欢聚集团</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>北京涂鸦多得科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阅文集团</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>三七互娱</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">上海天众体育管理有限公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>上海莉莉丝科技股份有限公司</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>上海黑之白信息科技有限公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>北京涂鸦多得科技有限公司</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">北京龙拳风暴科技有限公司</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>范特西科技</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>星河互动</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>腾讯</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>腾讯音乐娱乐集团</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>理想汽车</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>猎聘</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阅文集团</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Baidu</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>明略科技集团</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">新东方教育科技集团有限公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大专及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>携程</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>广州文远知行科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NIO蔚来</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大搜车</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>硕士及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>医渡云</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>一起教育科技</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SenseTime（商汤集团）</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NIO蔚来</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>平安好医生</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        职称\n",
       "公司名称             edu      \n",
       "恺英网络             学历不限   12\n",
       "                 本科及以上   6\n",
       "上海敬游软件科技有限公司     大专及以上   4\n",
       "阿里巴巴             学历不限    4\n",
       "完美世界             统招本科    4\n",
       "乐元素              统招本科    4\n",
       "三七互娱             统招本科    4\n",
       "成都零点能量信息技术有限责任公司 本科及以上   4\n",
       "大搜车              统招本科    3\n",
       "喜马拉雅             本科及以上   3\n",
       "斗鱼网络             本科及以上   3\n",
       "南京世赢乾网络科技有限公司    大专及以上   2\n",
       "恺英网络             大专及以上   2\n",
       "祖龙娱乐             统招本科    2\n",
       "广州百游信息科技有限公司     学历不限    2\n",
       "福州龙腾简合网络技术有限公司   本科及以上   2\n",
       "广州千骐动漫有限公司       大专及以上   2\n",
       "紫龙游戏             本科及以上   2\n",
       "完美世界             大专及以上   2\n",
       "武汉虚咖科技有限公司       大专及以上   2\n",
       "阿里巴巴             本科及以上   2\n",
       "华为               统招本科    2\n",
       "欢聚集团             本科及以上   2\n",
       "北京涂鸦多得科技有限公司     大专及以上   2\n",
       "阅文集团             学历不限    2\n",
       "三七互娱             本科及以上   2\n",
       "上海天众体育管理有限公司     本科及以上   2\n",
       "                 统招本科    2\n",
       "上海莉莉丝科技股份有限公司    统招本科    2\n",
       "上海黑之白信息科技有限公司    本科及以上   2\n",
       "北京涂鸦多得科技有限公司     统招本科    2\n",
       "北京龙拳风暴科技有限公司     学历不限    2\n",
       "                 统招本科    2\n",
       "范特西科技            本科及以上   2\n",
       "华为               本科及以上   2\n",
       "星河互动             大专及以上   2\n",
       "腾讯               本科及以上   1\n",
       "腾讯音乐娱乐集团         统招本科    1\n",
       "理想汽车             本科及以上   1\n",
       "猎聘               本科及以上   1\n",
       "阅文集团             本科及以上   1\n",
       "Baidu            学历不限    1\n",
       "明略科技集团           统招本科    1\n",
       "新东方教育科技集团有限公司    本科及以上   1\n",
       "                 大专及以上   1\n",
       "携程               统招本科    1\n",
       "广州文远知行科技有限公司     大专及以上   1\n",
       "NIO蔚来            大专及以上   1\n",
       "大搜车              大专及以上   1\n",
       "华为               硕士及以上   1\n",
       "医渡云              本科及以上   1\n",
       "一起教育科技           统招本科    1\n",
       "SenseTime（商汤集团）  本科及以上   1\n",
       "NIO蔚来            统招本科    1\n",
       "平安好医生            学历不限    1"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "session = HTMLSession()\n",
    "payload = 参数_compTag_平面设计师['2018互联网300强']\n",
    "r = session.get( url, params = payload)\n",
    "#热门公司数据导出表格\n",
    "def requests_liepin( url, params):\n",
    "    r = session.get( url , params = payload)\n",
    "\n",
    "    # 先取特定元素, 精准打击其子后辈\n",
    "    主要元素 = r.html.xpath( '//ul[@class=\"sojob-list\"]/li')\n",
    "\n",
    "    # 作为xpath字典，键为我要抓的牛肉名称，值为xpath\n",
    "    dict_xpaths={ \n",
    "        'text': {\n",
    "            'edu':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]',\n",
    "            '经验':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]/following-sibling::span',\n",
    "            '薪水':    '//div[contains(@class,\"job-info\")]/p/span[@class=\"text-warning\"]', \n",
    "            '时间':    '//div[contains(@class,\"job-info\")]/p/time/@title', \n",
    "            '职称':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司地点': '//div[contains(@class,\"job-info\")]/p/a',\n",
    "            '公司名称': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        },\n",
    "        'text_content': {\n",
    "        },\n",
    "        'href': {\n",
    "            '链结':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司URL': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        }\n",
    "    }\n",
    "\n",
    "    def get_e_text_content(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [e.xpath(_xpath_)[0].lxml.text_content() for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_text(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [\"\".join([x.strip() if type(x) is str else x.text.strip() for x in e.xpath(_xpath_)]) for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_href(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [list(e.xpath(_xpath_, first=True).absolute_links)[0] \\\n",
    "                   if len(e.xpath(_xpath_, first=True).absolute_links) >= 1  \\\n",
    "                   else \"\" for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    # 只对主要元素下进行.xpath取值\n",
    "    数据字典 = dict()\n",
    "\n",
    "    数据字典 = {k:get_e_text_content(v) for k,v in dict_xpaths['text_content'].items()}\n",
    "    数据字典.update({k:get_e_text(v) for k,v in dict_xpaths['text'].items()})\n",
    "    数据字典.update({k:get_e_href(v) for k,v in dict_xpaths['href'].items()})\n",
    "\n",
    "    数据 = pd.DataFrame(数据字典)\n",
    "    #数据.to_excel(\"20春_Web数据挖掘_week03_liepin.xlsx\", sheet_name=\"搜查结果\")\n",
    "    return (数据)\n",
    "\n",
    "\n",
    "#    多个页面\n",
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "\n",
    "list_df = list()\n",
    "for k,v in 参数_compTag_平面设计师.items():\n",
    "    payload = v\n",
    "    df = requests_liepin( url, params = payload)\n",
    "    df = df.assign (热门公司类型 = k)    \n",
    "    list_df.append(df)\n",
    "\n",
    "df_all = pd.concat(list_df)\n",
    "df_all\n",
    "\n",
    "#    输出\n",
    "df_all.to_excel(\"liepin_2018互联网300强.xlsx\", sheet_name=\"搜查结果\")\n",
    "\n",
    "#  Pandas  基本能力\n",
    "\n",
    "print (df_all.nunique())\n",
    "df_all[['edu']].drop_duplicates()\n",
    "\n",
    "df_all.groupby(['公司名称','edu']).agg({\"职称\":\"count\"}).sort_values(by='职称', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "edu        5\n",
      "经验         5\n",
      "薪水        43\n",
      "时间        14\n",
      "职称        79\n",
      "公司地点      33\n",
      "公司名称      41\n",
      "链结        80\n",
      "公司URL     42\n",
      "热门公司类型     3\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>职称</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>公司名称</th>\n",
       "      <th>edu</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">恺英网络</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>本科及以上</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>上海敬游软件科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阿里巴巴</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>完美世界</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>乐元素</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>三七互娱</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>成都零点能量信息技术有限责任公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大搜车</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>喜马拉雅</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>斗鱼网络</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>南京世赢乾网络科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>恺英网络</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>祖龙娱乐</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>广州百游信息科技有限公司</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>福州龙腾简合网络技术有限公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>广州千骐动漫有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>紫龙游戏</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>完美世界</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>武汉虚咖科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阿里巴巴</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>欢聚集团</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>北京涂鸦多得科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阅文集团</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>三七互娱</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">上海天众体育管理有限公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>上海莉莉丝科技股份有限公司</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>上海黑之白信息科技有限公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>北京涂鸦多得科技有限公司</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">北京龙拳风暴科技有限公司</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>范特西科技</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>星河互动</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>腾讯</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>腾讯音乐娱乐集团</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>理想汽车</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>猎聘</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阅文集团</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Baidu</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>明略科技集团</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">新东方教育科技集团有限公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大专及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>携程</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>广州文远知行科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NIO蔚来</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大搜车</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>硕士及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>医渡云</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>一起教育科技</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SenseTime（商汤集团）</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NIO蔚来</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>平安好医生</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        职称\n",
       "公司名称             edu      \n",
       "恺英网络             学历不限   12\n",
       "                 本科及以上   6\n",
       "上海敬游软件科技有限公司     大专及以上   4\n",
       "阿里巴巴             学历不限    4\n",
       "完美世界             统招本科    4\n",
       "乐元素              统招本科    4\n",
       "三七互娱             统招本科    4\n",
       "成都零点能量信息技术有限责任公司 本科及以上   4\n",
       "大搜车              统招本科    3\n",
       "喜马拉雅             本科及以上   3\n",
       "斗鱼网络             本科及以上   3\n",
       "南京世赢乾网络科技有限公司    大专及以上   2\n",
       "恺英网络             大专及以上   2\n",
       "祖龙娱乐             统招本科    2\n",
       "广州百游信息科技有限公司     学历不限    2\n",
       "福州龙腾简合网络技术有限公司   本科及以上   2\n",
       "广州千骐动漫有限公司       大专及以上   2\n",
       "紫龙游戏             本科及以上   2\n",
       "完美世界             大专及以上   2\n",
       "武汉虚咖科技有限公司       大专及以上   2\n",
       "阿里巴巴             本科及以上   2\n",
       "华为               统招本科    2\n",
       "欢聚集团             本科及以上   2\n",
       "北京涂鸦多得科技有限公司     大专及以上   2\n",
       "阅文集团             学历不限    2\n",
       "三七互娱             本科及以上   2\n",
       "上海天众体育管理有限公司     本科及以上   2\n",
       "                 统招本科    2\n",
       "上海莉莉丝科技股份有限公司    统招本科    2\n",
       "上海黑之白信息科技有限公司    本科及以上   2\n",
       "北京涂鸦多得科技有限公司     统招本科    2\n",
       "北京龙拳风暴科技有限公司     学历不限    2\n",
       "                 统招本科    2\n",
       "范特西科技            本科及以上   2\n",
       "华为               本科及以上   2\n",
       "星河互动             大专及以上   2\n",
       "腾讯               本科及以上   1\n",
       "腾讯音乐娱乐集团         统招本科    1\n",
       "理想汽车             本科及以上   1\n",
       "猎聘               本科及以上   1\n",
       "阅文集团             本科及以上   1\n",
       "Baidu            学历不限    1\n",
       "明略科技集团           统招本科    1\n",
       "新东方教育科技集团有限公司    本科及以上   1\n",
       "                 大专及以上   1\n",
       "携程               统招本科    1\n",
       "广州文远知行科技有限公司     大专及以上   1\n",
       "NIO蔚来            大专及以上   1\n",
       "大搜车              大专及以上   1\n",
       "华为               硕士及以上   1\n",
       "医渡云              本科及以上   1\n",
       "一起教育科技           统招本科    1\n",
       "SenseTime（商汤集团）  本科及以上   1\n",
       "NIO蔚来            统招本科    1\n",
       "平安好医生            学历不限    1"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "session = HTMLSession()\n",
    "payload = 参数_compTag_平面设计师['游戏产业']\n",
    "r = session.get( url, params = payload)\n",
    "#热门公司数据导出表格\n",
    "def requests_liepin( url, params):\n",
    "    r = session.get( url , params = payload)\n",
    "\n",
    "    # 先取特定元素, 精准打击其子后辈\n",
    "    主要元素 = r.html.xpath( '//ul[@class=\"sojob-list\"]/li')\n",
    "\n",
    "    # 作为xpath字典，键为我要抓的牛肉名称，值为xpath\n",
    "    dict_xpaths={ \n",
    "        'text': {\n",
    "            'edu':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]',\n",
    "            '经验':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]/following-sibling::span',\n",
    "            '薪水':    '//div[contains(@class,\"job-info\")]/p/span[@class=\"text-warning\"]', \n",
    "            '时间':    '//div[contains(@class,\"job-info\")]/p/time/@title', \n",
    "            '职称':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司地点': '//div[contains(@class,\"job-info\")]/p/a',\n",
    "            '公司名称': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        },\n",
    "        'text_content': {\n",
    "        },\n",
    "        'href': {\n",
    "            '链结':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司URL': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        }\n",
    "    }\n",
    "\n",
    "    def get_e_text_content(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [e.xpath(_xpath_)[0].lxml.text_content() for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_text(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [\"\".join([x.strip() if type(x) is str else x.text.strip() for x in e.xpath(_xpath_)]) for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_href(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [list(e.xpath(_xpath_, first=True).absolute_links)[0] \\\n",
    "                   if len(e.xpath(_xpath_, first=True).absolute_links) >= 1  \\\n",
    "                   else \"\" for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    # 只对主要元素下进行.xpath取值\n",
    "    数据字典 = dict()\n",
    "\n",
    "    数据字典 = {k:get_e_text_content(v) for k,v in dict_xpaths['text_content'].items()}\n",
    "    数据字典.update({k:get_e_text(v) for k,v in dict_xpaths['text'].items()})\n",
    "    数据字典.update({k:get_e_href(v) for k,v in dict_xpaths['href'].items()})\n",
    "\n",
    "    数据 = pd.DataFrame(数据字典)\n",
    "    #数据.to_excel(\"20春_Web数据挖掘_week03_liepin.xlsx\", sheet_name=\"搜查结果\")\n",
    "    return (数据)\n",
    "\n",
    "\n",
    "#    多个页面\n",
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "\n",
    "list_df = list()\n",
    "for k,v in 参数_compTag_平面设计师.items():\n",
    "    payload = v\n",
    "    df = requests_liepin( url, params = payload)\n",
    "    df = df.assign (热门公司类型 = k)    \n",
    "    list_df.append(df)\n",
    "\n",
    "df_all = pd.concat(list_df)\n",
    "df_all\n",
    "\n",
    "#    输出\n",
    "df_all.to_excel(\"liepin_游戏产业.xlsx\", sheet_name=\"搜查结果\")\n",
    "\n",
    "#  Pandas  基本能力\n",
    "\n",
    "print (df_all.nunique())\n",
    "df_all[['edu']].drop_duplicates()\n",
    "\n",
    "df_all.groupby(['公司名称','edu']).agg({\"职称\":\"count\"}).sort_values(by='职称', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "edu        5\n",
      "经验         5\n",
      "薪水        43\n",
      "时间        14\n",
      "职称        78\n",
      "公司地点      34\n",
      "公司名称      41\n",
      "链结        80\n",
      "公司URL     42\n",
      "热门公司类型     3\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>职称</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>公司名称</th>\n",
       "      <th>edu</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">恺英网络</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>本科及以上</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>完美世界</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>乐元素</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>上海敬游软件科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>三七互娱</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>成都零点能量信息技术有限责任公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大搜车</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阿里巴巴</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>斗鱼网络</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阿里巴巴</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>喜马拉雅</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>祖龙娱乐</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>南京世赢乾网络科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>恺英网络</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>广州百游信息科技有限公司</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>福州龙腾简合网络技术有限公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>广州千骐动漫有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>紫龙游戏</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>武汉虚咖科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>完美世界</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>欢聚集团</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>北京涂鸦多得科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阅文集团</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>三七互娱</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">上海天众体育管理有限公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>上海莉莉丝科技股份有限公司</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>上海黑之白信息科技有限公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>北京涂鸦多得科技有限公司</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">北京龙拳风暴科技有限公司</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>统招本科</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>范特西科技</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>星河互动</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>腾讯</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>腾讯音乐娱乐集团</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>理想汽车</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>猎聘</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>阅文集团</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Baidu</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>明略科技集团</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">新东方教育科技集团有限公司</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大专及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>携程</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>广州文远知行科技有限公司</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NIO蔚来</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>大搜车</th>\n",
       "      <th>大专及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>华为</th>\n",
       "      <th>硕士及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>医渡云</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>一起教育科技</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SenseTime（商汤集团）</th>\n",
       "      <th>本科及以上</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NIO蔚来</th>\n",
       "      <th>统招本科</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>平安好医生</th>\n",
       "      <th>学历不限</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        职称\n",
       "公司名称             edu      \n",
       "恺英网络             学历不限   12\n",
       "                 本科及以上   6\n",
       "完美世界             统招本科    4\n",
       "乐元素              统招本科    4\n",
       "上海敬游软件科技有限公司     大专及以上   4\n",
       "三七互娱             统招本科    4\n",
       "成都零点能量信息技术有限责任公司 本科及以上   4\n",
       "大搜车              统招本科    3\n",
       "阿里巴巴             本科及以上   3\n",
       "斗鱼网络             本科及以上   3\n",
       "阿里巴巴             学历不限    3\n",
       "喜马拉雅             本科及以上   3\n",
       "祖龙娱乐             统招本科    2\n",
       "南京世赢乾网络科技有限公司    大专及以上   2\n",
       "恺英网络             大专及以上   2\n",
       "广州百游信息科技有限公司     学历不限    2\n",
       "福州龙腾简合网络技术有限公司   本科及以上   2\n",
       "广州千骐动漫有限公司       大专及以上   2\n",
       "紫龙游戏             本科及以上   2\n",
       "武汉虚咖科技有限公司       大专及以上   2\n",
       "完美世界             大专及以上   2\n",
       "华为               统招本科    2\n",
       "欢聚集团             本科及以上   2\n",
       "北京涂鸦多得科技有限公司     大专及以上   2\n",
       "阅文集团             学历不限    2\n",
       "三七互娱             本科及以上   2\n",
       "上海天众体育管理有限公司     本科及以上   2\n",
       "                 统招本科    2\n",
       "上海莉莉丝科技股份有限公司    统招本科    2\n",
       "上海黑之白信息科技有限公司    本科及以上   2\n",
       "北京涂鸦多得科技有限公司     统招本科    2\n",
       "北京龙拳风暴科技有限公司     学历不限    2\n",
       "                 统招本科    2\n",
       "范特西科技            本科及以上   2\n",
       "华为               本科及以上   2\n",
       "星河互动             大专及以上   2\n",
       "腾讯               本科及以上   1\n",
       "腾讯音乐娱乐集团         统招本科    1\n",
       "理想汽车             本科及以上   1\n",
       "猎聘               本科及以上   1\n",
       "阅文集团             本科及以上   1\n",
       "Baidu            学历不限    1\n",
       "明略科技集团           统招本科    1\n",
       "新东方教育科技集团有限公司    本科及以上   1\n",
       "                 大专及以上   1\n",
       "携程               统招本科    1\n",
       "广州文远知行科技有限公司     大专及以上   1\n",
       "NIO蔚来            大专及以上   1\n",
       "大搜车              大专及以上   1\n",
       "华为               硕士及以上   1\n",
       "医渡云              本科及以上   1\n",
       "一起教育科技           统招本科    1\n",
       "SenseTime（商汤集团）  本科及以上   1\n",
       "NIO蔚来            统招本科    1\n",
       "平安好医生            学历不限    1"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "session = HTMLSession()\n",
    "payload = 参数_compTag_平面设计师['薪资20-30万']\n",
    "r = session.get( url, params = payload)\n",
    "#热门公司数据导出表格\n",
    "def requests_liepin( url, params):\n",
    "    r = session.get( url , params = payload)\n",
    "\n",
    "    # 先取特定元素, 精准打击其子后辈\n",
    "    主要元素 = r.html.xpath( '//ul[@class=\"sojob-list\"]/li')\n",
    "\n",
    "    # 作为xpath字典，键为我要抓的牛肉名称，值为xpath\n",
    "    dict_xpaths={ \n",
    "        'text': {\n",
    "            'edu':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]',\n",
    "            '经验':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]/following-sibling::span',\n",
    "            '薪水':    '//div[contains(@class,\"job-info\")]/p/span[@class=\"text-warning\"]', \n",
    "            '时间':    '//div[contains(@class,\"job-info\")]/p/time/@title', \n",
    "            '职称':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司地点': '//div[contains(@class,\"job-info\")]/p/a',\n",
    "            '公司名称': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        },\n",
    "        'text_content': {\n",
    "        },\n",
    "        'href': {\n",
    "            '链结':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司URL': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        }\n",
    "    }\n",
    "\n",
    "    def get_e_text_content(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [e.xpath(_xpath_)[0].lxml.text_content() for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_text(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [\"\".join([x.strip() if type(x) is str else x.text.strip() for x in e.xpath(_xpath_)]) for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_href(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [list(e.xpath(_xpath_, first=True).absolute_links)[0] \\\n",
    "                   if len(e.xpath(_xpath_, first=True).absolute_links) >= 1  \\\n",
    "                   else \"\" for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    # 只对主要元素下进行.xpath取值\n",
    "    数据字典 = dict()\n",
    "\n",
    "    数据字典 = {k:get_e_text_content(v) for k,v in dict_xpaths['text_content'].items()}\n",
    "    数据字典.update({k:get_e_text(v) for k,v in dict_xpaths['text'].items()})\n",
    "    数据字典.update({k:get_e_href(v) for k,v in dict_xpaths['href'].items()})\n",
    "\n",
    "    数据 = pd.DataFrame(数据字典)\n",
    "    #数据.to_excel(\"20春_Web数据挖掘_week03_liepin.xlsx\", sheet_name=\"搜查结果\")\n",
    "    return (数据)\n",
    "\n",
    "\n",
    "#    多个页面\n",
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "\n",
    "list_df = list()\n",
    "for k,v in 参数_compTag_平面设计师.items():\n",
    "    payload = v\n",
    "    df = requests_liepin( url, params = payload)\n",
    "    df = df.assign (热门公司类型 = k)    \n",
    "    list_df.append(df)\n",
    "\n",
    "df_all = pd.concat(list_df)\n",
    "df_all\n",
    "\n",
    "#    输出\n",
    "df_all.to_excel(\"liepin_薪资20-30万.xlsx\", sheet_name=\"搜查结果\")\n",
    "\n",
    "#  Pandas  基本能力\n",
    "\n",
    "print (df_all.nunique())\n",
    "df_all[['edu']].drop_duplicates()\n",
    "\n",
    "df_all.groupby(['公司名称','edu']).agg({\"职称\":\"count\"}).sort_values(by='职称', ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3. 创建payload模版"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = \"https://www.liepin.com/zhaopin/\"\n",
    "session = HTMLSession()\n",
    "payload = 参数_compTag_平面设计师['薪资20-30万']\n",
    "r = session.get( url, params = payload)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4. xpath解析页面数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'互联网/电商': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=040&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '游戏产业': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=420&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '计算机软件': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=010&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " 'IT服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_01&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=030&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '电子/芯片/半导体': '/zhaopin/?subIndustry=&init=-1&industryType=industry_02&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=050&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '通信业': '/zhaopin/?subIndustry=&init=-1&industryType=industry_02&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=060&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '计算机/网络设备': '/zhaopin/?subIndustry=&init=-1&industryType=industry_02&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=020&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '房地产/建筑': '/zhaopin/?subIndustry=&init=-1&industryType=industry_03&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=080&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '规划/设计/装潢': '/zhaopin/?subIndustry=&init=-1&industryType=industry_03&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=100&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '房地产服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_03&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=090&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '银行': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=130&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '保险': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=140&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '基金/证券/投资': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=150&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '会计/审计': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=430&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '信托/担保/拍卖': '/zhaopin/?subIndustry=&init=-1&industryType=industry_04&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=500&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '快消品': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=190&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '批发零售': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=240&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '服装纺织': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=200&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '家具/家电': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=210&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '办公设备': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=220&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '奢侈品/收藏品': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=460&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '珠宝/玩具/工艺品': '/zhaopin/?subIndustry=&init=-1&industryType=industry_05&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=470&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '汽车/摩托车': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=350&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '机械/机电/重工': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=360&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '印刷/包装/造纸': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=180&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '原材料加工': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=370&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '仪器/电气/自动化': '/zhaopin/?subIndustry=&init=-1&industryType=industry_06&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=340&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '制药/生物工程': '/zhaopin/?subIndustry=&init=-1&industryType=industry_10&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=270&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '医疗/保健/美容': '/zhaopin/?subIndustry=&init=-1&industryType=industry_10&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=280&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '医疗器械': '/zhaopin/?subIndustry=&init=-1&industryType=industry_10&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=290&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '能源/水利': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=330&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '石油/化工': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=310&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '采掘/冶炼/矿产': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=320&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '环保': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=300&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '新能源': '/zhaopin/?subIndustry=&init=-1&industryType=industry_11&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=490&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '专业服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=120&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '中介服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=110&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '外包服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=440&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '检测/认证': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=450&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '餐饮/酒旅/服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=230&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '文体娱乐': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=260&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '租赁服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_07&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=510&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '广告/市场/会展': '/zhaopin/?subIndustry=&init=-1&industryType=industry_08&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=070&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '影视文化': '/zhaopin/?subIndustry=&init=-1&industryType=industry_08&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=170&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '教育培训': '/zhaopin/?subIndustry=&init=-1&industryType=industry_08&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=380&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '交通/物流/运输': '/zhaopin/?subIndustry=&init=-1&industryType=industry_09&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=250&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '贸易/进出口': '/zhaopin/?subIndustry=&init=-1&industryType=industry_09&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=160&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '航空/航天': '/zhaopin/?subIndustry=&init=-1&industryType=industry_09&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=480&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '政务/公共服务': '/zhaopin/?subIndustry=&init=-1&industryType=industry_12&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=390&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '农林牧渔': '/zhaopin/?subIndustry=&init=-1&industryType=industry_12&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=410&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e',\n",
       " '其他行业': '/zhaopin/?subIndustry=&init=-1&industryType=industry_12&headckid=8e380b04c7b51f0e&flushckid=1&fromSearchBtn=2&industries=400&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=8e380b04c7b51f0e&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=f84acacdf3add641e4fa4b9e7f96ac1e&d_curPage=0&d_pageSize=40&d_headId=f84acacdf3add641e4fa4b9e7f96ac1e'}"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url = \"https://www.liepin.com/zhaopin/?keyword=平面设计师\"  # keyword可随意修改，只要在猎聘上可以找到就行\n",
    "session = HTMLSession()\n",
    "r = session.get(url)\n",
    "行业链接 = r.html.xpath('//ul[@class=\"clearfix\"]/li/div/a/@href')\n",
    "行业名 = r.html.xpath('//ul[@class=\"clearfix\"]/li/div/a/text()')\n",
    "行业链接选择器 = dict(zip(行业名,行业链接))\n",
    "行业链接选择器"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 5. 翻页"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = \"https://www.liepin.com/zhaopin/?keyword=平面设计师\"  # keyword可随意修改，只要在猎聘上可以找到就行\n",
    "session = HTMLSession()\n",
    "r = session.get( url )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[<Element 'a' href='/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=1'>, <Element 'a' href='/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=2'>, <Element 'a' href='/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=3'>, <Element 'a' href='/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=4'>, <Element 'a' href='/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=1'>, <Element 'a' class=('last',) href='/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=9' title='末页'>]\n",
      "{'2': '/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=1', '3': '/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=2', '4': '/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=3', '5': '/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=4', '下一页': '/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=1', '': '/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=9'}\n"
     ]
    }
   ],
   "source": [
    "#   xpath 解析翻页a/@href\n",
    "xpath_翻页a = '//div[@class=\"pagerbar\"]/a' # 有disabled, current等href是javascript\n",
    "xpath_翻页a = '//div[@class=\"pagerbar\"]/a[starts-with(@href,\"/zhaopin\")]'\n",
    "print (r.html.xpath(xpath_翻页a)) # 物件\n",
    "\n",
    "href_列表 = [x.xpath('//@href')[0] for x in r.html.xpath(xpath_翻页a)]\n",
    "#print (href_列表)\n",
    "\n",
    "文字_列表 = [x.text for x in r.html.xpath(xpath_翻页a)]\n",
    "#print (文字_列表)\n",
    "\n",
    "href_字典 = {x.text:x.xpath('//@href')[0]  for x in r.html.xpath(xpath_翻页a)}\n",
    "print (href_字典)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>scheme</th>\n",
       "      <th>netloc</th>\n",
       "      <th>path</th>\n",
       "      <th>params</th>\n",
       "      <th>query</th>\n",
       "      <th>fragment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=2691a882b6920425&amp;fromSearchBt...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=2691a882b6920425&amp;fromSearchBt...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=2691a882b6920425&amp;fromSearchBt...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=2691a882b6920425&amp;fromSearchBt...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=2691a882b6920425&amp;fromSearchBt...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>init=-1&amp;headckid=2691a882b6920425&amp;fromSearchBt...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  scheme netloc       path params  \\\n",
       "0                /zhaopin/          \n",
       "1                /zhaopin/          \n",
       "2                /zhaopin/          \n",
       "3                /zhaopin/          \n",
       "4                /zhaopin/          \n",
       "5                /zhaopin/          \n",
       "\n",
       "                                               query fragment  \n",
       "0  init=-1&headckid=2691a882b6920425&fromSearchBt...           \n",
       "1  init=-1&headckid=2691a882b6920425&fromSearchBt...           \n",
       "2  init=-1&headckid=2691a882b6920425&fromSearchBt...           \n",
       "3  init=-1&headckid=2691a882b6920425&fromSearchBt...           \n",
       "4  init=-1&headckid=2691a882b6920425&fromSearchBt...           \n",
       "5  init=-1&headckid=2691a882b6920425&fromSearchBt...           "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "scheme      1\n",
      "netloc      1\n",
      "path        1\n",
      "params      1\n",
      "query       5\n",
      "fragment    1\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ckid</th>\n",
       "      <th>curPage</th>\n",
       "      <th>d_ckId</th>\n",
       "      <th>d_curPage</th>\n",
       "      <th>d_headId</th>\n",
       "      <th>d_pageSize</th>\n",
       "      <th>d_sfrom</th>\n",
       "      <th>fromSearchBtn</th>\n",
       "      <th>headckid</th>\n",
       "      <th>init</th>\n",
       "      <th>keyword</th>\n",
       "      <th>siTag</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2691a882b6920425°radeFlag=0</td>\n",
       "      <td>1</td>\n",
       "      <td>e004389a911f863ef81f8171fc89ac9c</td>\n",
       "      <td>0</td>\n",
       "      <td>e004389a911f863ef81f8171fc89ac9c</td>\n",
       "      <td>40</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>2</td>\n",
       "      <td>2691a882b6920425</td>\n",
       "      <td>-1</td>\n",
       "      <td>平面设计师</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2691a882b6920425°radeFlag=0</td>\n",
       "      <td>2</td>\n",
       "      <td>e004389a911f863ef81f8171fc89ac9c</td>\n",
       "      <td>0</td>\n",
       "      <td>e004389a911f863ef81f8171fc89ac9c</td>\n",
       "      <td>40</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>2</td>\n",
       "      <td>2691a882b6920425</td>\n",
       "      <td>-1</td>\n",
       "      <td>平面设计师</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2691a882b6920425°radeFlag=0</td>\n",
       "      <td>3</td>\n",
       "      <td>e004389a911f863ef81f8171fc89ac9c</td>\n",
       "      <td>0</td>\n",
       "      <td>e004389a911f863ef81f8171fc89ac9c</td>\n",
       "      <td>40</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>2</td>\n",
       "      <td>2691a882b6920425</td>\n",
       "      <td>-1</td>\n",
       "      <td>平面设计师</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2691a882b6920425°radeFlag=0</td>\n",
       "      <td>4</td>\n",
       "      <td>e004389a911f863ef81f8171fc89ac9c</td>\n",
       "      <td>0</td>\n",
       "      <td>e004389a911f863ef81f8171fc89ac9c</td>\n",
       "      <td>40</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>2</td>\n",
       "      <td>2691a882b6920425</td>\n",
       "      <td>-1</td>\n",
       "      <td>平面设计师</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2691a882b6920425°radeFlag=0</td>\n",
       "      <td>1</td>\n",
       "      <td>e004389a911f863ef81f8171fc89ac9c</td>\n",
       "      <td>0</td>\n",
       "      <td>e004389a911f863ef81f8171fc89ac9c</td>\n",
       "      <td>40</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>2</td>\n",
       "      <td>2691a882b6920425</td>\n",
       "      <td>-1</td>\n",
       "      <td>平面设计师</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2691a882b6920425°radeFlag=0</td>\n",
       "      <td>9</td>\n",
       "      <td>e004389a911f863ef81f8171fc89ac9c</td>\n",
       "      <td>0</td>\n",
       "      <td>e004389a911f863ef81f8171fc89ac9c</td>\n",
       "      <td>40</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>2</td>\n",
       "      <td>2691a882b6920425</td>\n",
       "      <td>-1</td>\n",
       "      <td>平面设计师</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          ckid curPage                            d_ckId  \\\n",
       "0  2691a882b6920425°radeFlag=0       1  e004389a911f863ef81f8171fc89ac9c   \n",
       "1  2691a882b6920425°radeFlag=0       2  e004389a911f863ef81f8171fc89ac9c   \n",
       "2  2691a882b6920425°radeFlag=0       3  e004389a911f863ef81f8171fc89ac9c   \n",
       "3  2691a882b6920425°radeFlag=0       4  e004389a911f863ef81f8171fc89ac9c   \n",
       "4  2691a882b6920425°radeFlag=0       1  e004389a911f863ef81f8171fc89ac9c   \n",
       "5  2691a882b6920425°radeFlag=0       9  e004389a911f863ef81f8171fc89ac9c   \n",
       "\n",
       "  d_curPage                          d_headId d_pageSize         d_sfrom  \\\n",
       "0         0  e004389a911f863ef81f8171fc89ac9c         40  search_unknown   \n",
       "1         0  e004389a911f863ef81f8171fc89ac9c         40  search_unknown   \n",
       "2         0  e004389a911f863ef81f8171fc89ac9c         40  search_unknown   \n",
       "3         0  e004389a911f863ef81f8171fc89ac9c         40  search_unknown   \n",
       "4         0  e004389a911f863ef81f8171fc89ac9c         40  search_unknown   \n",
       "5         0  e004389a911f863ef81f8171fc89ac9c         40  search_unknown   \n",
       "\n",
       "  fromSearchBtn          headckid init keyword  \\\n",
       "0             2  2691a882b6920425   -1   平面设计师   \n",
       "1             2  2691a882b6920425   -1   平面设计师   \n",
       "2             2  2691a882b6920425   -1   平面设计师   \n",
       "3             2  2691a882b6920425   -1   平面设计师   \n",
       "4             2  2691a882b6920425   -1   平面设计师   \n",
       "5             2  2691a882b6920425   -1   平面设计师   \n",
       "\n",
       "                                           siTag  \n",
       "0  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  \n",
       "1  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  \n",
       "2  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  \n",
       "3  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  \n",
       "4  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  \n",
       "5  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ckid             1\n",
      "curPage          5\n",
      "d_ckId           1\n",
      "d_curPage        1\n",
      "d_headId         1\n",
      "d_pageSize       1\n",
      "d_sfrom          1\n",
      "fromSearchBtn    1\n",
      "headckid         1\n",
      "init             1\n",
      "keyword          1\n",
      "siTag            1\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "# 建构参数模板：找到关键参数及参数结构\n",
    "\n",
    "# 需要模组库\n",
    "from urllib.parse import urlparse, parse_qs\n",
    "import pandas as pd\n",
    "from IPython.display import display, HTML\n",
    "\n",
    "# 总体目标：输入 href_列表, 建构出参数字典\n",
    "\n",
    "# urlparse 解析后丢入数据框\n",
    "df = pd.DataFrame([ urlparse(x) for x in href_列表])\n",
    "df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "\n",
    "display(df)\n",
    "print(df.nunique())\n",
    "display(df_qs)\n",
    "print(df_qs.nunique())\n",
    "\n",
    "df_qs.curPage\n",
    "df_qs = df_qs.assign (curPage_int=df_qs.curPage.astype(int)) # 变成整数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'init': ['-1'], 'headckid': ['2691a882b6920425'], 'fromSearchBtn': ['2'], 'keyword': ['平面设计师'], 'ckid': ['2691a882b6920425°radeFlag=0'], 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'], 'd_sfrom': ['search_unknown'], 'd_ckId': ['e004389a911f863ef81f8171fc89ac9c'], 'd_curPage': ['0'], 'd_pageSize': ['40'], 'd_headId': ['e004389a911f863ef81f8171fc89ac9c'], 'curPage': ['1']}\n",
      "{'2': '/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=1', '3': '/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=2', '4': '/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=3', '5': '/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=4', '下一页': '/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=1', '': '/zhaopin/?init=-1&headckid=2691a882b6920425&fromSearchBtn=2&keyword=%E5%B9%B3%E9%9D%A2%E8%AE%BE%E8%AE%A1%E5%B8%88&ckid=2691a882b6920425°radeFlag=0&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=e004389a911f863ef81f8171fc89ac9c&d_curPage=0&d_pageSize=40&d_headId=e004389a911f863ef81f8171fc89ac9c&curPage=9'}\n"
     ]
    }
   ],
   "source": [
    "#  建构参数模板：找到关键参数及参数结构\n",
    "\n",
    "def parse_url_qs_for_curPage (url):\n",
    "    six_parts = urlparse(url) \n",
    "    out = parse_qs(six_parts.query)\n",
    "    return (out)\n",
    "\n",
    "# 取一例做模板\n",
    "参数模板 = parse_url_qs_for_curPage(href_列表[0])\n",
    "print (参数模板)\n",
    "\n",
    "print (href_字典)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1\n",
      "9\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{0: {'init': ['-1'],\n",
       "  'headckid': ['2691a882b6920425'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': ['平面设计师'],\n",
       "  'ckid': ['2691a882b6920425°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'curPage': [0]},\n",
       " 1: {'init': ['-1'],\n",
       "  'headckid': ['2691a882b6920425'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': ['平面设计师'],\n",
       "  'ckid': ['2691a882b6920425°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'curPage': [1]},\n",
       " 2: {'init': ['-1'],\n",
       "  'headckid': ['2691a882b6920425'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': ['平面设计师'],\n",
       "  'ckid': ['2691a882b6920425°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'curPage': [2]},\n",
       " 3: {'init': ['-1'],\n",
       "  'headckid': ['2691a882b6920425'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': ['平面设计师'],\n",
       "  'ckid': ['2691a882b6920425°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'curPage': [3]},\n",
       " 4: {'init': ['-1'],\n",
       "  'headckid': ['2691a882b6920425'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': ['平面设计师'],\n",
       "  'ckid': ['2691a882b6920425°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'curPage': [4]},\n",
       " 5: {'init': ['-1'],\n",
       "  'headckid': ['2691a882b6920425'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': ['平面设计师'],\n",
       "  'ckid': ['2691a882b6920425°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'curPage': [5]},\n",
       " 6: {'init': ['-1'],\n",
       "  'headckid': ['2691a882b6920425'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': ['平面设计师'],\n",
       "  'ckid': ['2691a882b6920425°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'curPage': [6]},\n",
       " 7: {'init': ['-1'],\n",
       "  'headckid': ['2691a882b6920425'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': ['平面设计师'],\n",
       "  'ckid': ['2691a882b6920425°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'curPage': [7]},\n",
       " 8: {'init': ['-1'],\n",
       "  'headckid': ['2691a882b6920425'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': ['平面设计师'],\n",
       "  'ckid': ['2691a882b6920425°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'curPage': [8]},\n",
       " 9: {'init': ['-1'],\n",
       "  'headckid': ['2691a882b6920425'],\n",
       "  'fromSearchBtn': ['2'],\n",
       "  'keyword': ['平面设计师'],\n",
       "  'ckid': ['2691a882b6920425°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e004389a911f863ef81f8171fc89ac9c'],\n",
       "  'curPage': [9]}}"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#  建构参数模板生成器：keyword curPage\n",
    "def 参数模板生成(keyword, curPage):\n",
    "    参数 = 参数模板.copy()\n",
    "    参数['curPage'] = curPage\n",
    "    参数['keyword'] = keyword\n",
    "    return (参数)\n",
    "\n",
    "参数_keyword_平面设计师_curPage = { \n",
    "    i:参数模板生成(curPage = [i], \\\n",
    "                  keyword = ['平面设计师']) \\\n",
    "    for i,v in href_字典.items()\\\n",
    "    }\n",
    "\n",
    "# print(参数_keyword_平面设计师_curPage) # 只生成本页有的额外翻页URL, 并没有推估到&curPage=9,也没有这页\n",
    "\n",
    "print (df_qs.curPage_int.min()) # 最小值只有1\n",
    "print (df_qs.curPage_int.max()) # 最大值只有9\n",
    "\n",
    "# 应该是 0 (本页)....9(最大值)\n",
    "\n",
    "参数_keyword_平面设计师_curPage = { \n",
    "    i:参数模板生成(curPage = [i], \\\n",
    "                  keyword = ['平面设计师']) \\\n",
    "    for i in range(0,df_qs.curPage_int.max()+1)\\\n",
    "    }\n",
    "参数_keyword_平面设计师_curPage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
